{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 安装pandas,numpy\n",
    "- 直接安装anaconda(集成环境)\n",
    "- 利用pandas保存数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 抓取中国票房网站的数据\n",
    "- URL = 'www.cbooo.cn'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'Movieid': 669412,\n",
       "  'MovieName': '八佰',\n",
       "  'Genre_Main': '战争',\n",
       "  'BoxOffice': 206778,\n",
       "  'AvgPrice': 38,\n",
       "  'AvgPeoPle': 31.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2020-08-21',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/06/76/wKgASV8naXmAbQuEAABGZEs2NKg717.jpg'},\n",
       " {'Movieid': 685447,\n",
       "  'MovieName': '宠爱',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 51030,\n",
       "  'AvgPrice': 35,\n",
       "  'AvgPeoPle': 8.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2019-12-31',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/97/wKgAS138anyABrGMAABqXWMS8J0285.jpg'},\n",
       " {'Movieid': 693266,\n",
       "  'MovieName': '误杀',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 49929,\n",
       "  'AvgPrice': 33,\n",
       "  'AvgPeoPle': 8.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2019-12-13',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/96/wKgAS13gt0WAS3CjAABoTRx_PZ8927.jpg'},\n",
       " {'Movieid': 684830,\n",
       "  'MovieName': '我在时间尽头等你',\n",
       "  'Genre_Main': '爱情',\n",
       "  'BoxOffice': 43905,\n",
       "  'AvgPrice': 35,\n",
       "  'AvgPeoPle': 22.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2020-08-25',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/03/28/wKgAS18Y9LGASDYpAABfnHubW8I079.jpg'},\n",
       " {'Movieid': 624015,\n",
       "  'MovieName': '叶问4：完结篇',\n",
       "  'Genre_Main': '动作',\n",
       "  'BoxOffice': 41837,\n",
       "  'AvgPrice': 36,\n",
       "  'AvgPeoPle': 7.0,\n",
       "  'Area': '中国/中国香港',\n",
       "  'ReleaseTime': '2019-12-20',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/05/2C/wKgASV34PqSAQliYAABmxNg1oI0829.jpg'},\n",
       " {'Movieid': 412178,\n",
       "  'MovieName': '哈利·波特与魔法石',\n",
       "  'Genre_Main': '魔幻/动作',\n",
       "  'BoxOffice': 18665,\n",
       "  'AvgPrice': 35,\n",
       "  'AvgPeoPle': 9.0,\n",
       "  'Area': '英国',\n",
       "  'ReleaseTime': '2002-01-31',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/00/C4/wKgASVznzZqAOQukAAC7ydXidio865.jpg'},\n",
       " {'Movieid': 662461,\n",
       "  'MovieName': '多力特的奇幻冒险',\n",
       "  'Genre_Main': '喜剧',\n",
       "  'BoxOffice': 13587,\n",
       "  'AvgPrice': 32,\n",
       "  'AvgPeoPle': 7.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-07-24',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/06/48/wKgASV8ZBp6AS4BUAABz6vg4BUc187.jpg'},\n",
       " {'Movieid': 346481,\n",
       "  'MovieName': '星际穿越',\n",
       "  'Genre_Main': '科幻',\n",
       "  'BoxOffice': 12141,\n",
       "  'AvgPrice': 30,\n",
       "  'AvgPeoPle': 8.0,\n",
       "  'Area': '美国/英国',\n",
       "  'ReleaseTime': '2014-11-12',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/00/6E/wKgASlznz4-AIYUnAADYYros590752.jpg'},\n",
       " {'Movieid': 696120,\n",
       "  'MovieName': '我为你牺牲',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 10610,\n",
       "  'AvgPrice': 71,\n",
       "  'AvgPeoPle': 54.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2019-12-05',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/96/wKgASl3fKoOARwJfAAB7brwyKHk155.jpg'},\n",
       " {'Movieid': 694783,\n",
       "  'MovieName': '变身特工',\n",
       "  'Genre_Main': '动画',\n",
       "  'BoxOffice': 10340,\n",
       "  'AvgPrice': 35,\n",
       "  'AvgPeoPle': 6.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-01-03',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/98/wKgAS14Jq7-AcnA_AAB3X71QAZw413.jpg'},\n",
       " {'Movieid': 682205,\n",
       "  'MovieName': '1917',\n",
       "  'Genre_Main': '战争',\n",
       "  'BoxOffice': 6876,\n",
       "  'AvgPrice': 32,\n",
       "  'AvgPeoPle': 5.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-08-07',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/03/2A/wKgAS18aTLmAPIE6AABrkIC-9Vk440.jpg'},\n",
       " {'Movieid': 456236,\n",
       "  'MovieName': '美丽人生',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 5808,\n",
       "  'AvgPrice': 34,\n",
       "  'AvgPeoPle': 6.0,\n",
       "  'Area': '意大利',\n",
       "  'ReleaseTime': '2020-01-03',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/05/2C/wKgASV4Cu6mAE-wSAABqfk7d30c272.jpg'},\n",
       " {'Movieid': 694876,\n",
       "  'MovieName': '天使陷落',\n",
       "  'Genre_Main': '动作',\n",
       "  'BoxOffice': 5506,\n",
       "  'AvgPrice': 33,\n",
       "  'AvgPeoPle': 6.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2019-12-31',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/05/2D/wKgASV4JtDSAB2UzAACCfuBOj9o001.jpg'},\n",
       " {'Movieid': 673894,\n",
       "  'MovieName': '荞麦疯长',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 5032,\n",
       "  'AvgPrice': 36,\n",
       "  'AvgPeoPle': 11.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2020-08-25',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/07/43/wKgASV86Qt6AHHTLAABujfqw0-o689.jpg'},\n",
       " {'Movieid': 695496,\n",
       "  'MovieName': '紫罗兰永恒花园外传：永远与自动手记人偶',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 4853,\n",
       "  'AvgPrice': 30,\n",
       "  'AvgPeoPle': 4.0,\n",
       "  'Area': '日本',\n",
       "  'ReleaseTime': '2020-01-10',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/07/2D/wKgASV86MRuAfszjAACSzXdcoxU388.jpg'},\n",
       " {'Movieid': 694805,\n",
       "  'MovieName': '鲨海逃生',\n",
       "  'Genre_Main': '灾难',\n",
       "  'BoxOffice': 4752,\n",
       "  'AvgPrice': 30,\n",
       "  'AvgPeoPle': 4.0,\n",
       "  'Area': '美国/英国',\n",
       "  'ReleaseTime': '2020-01-10',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/05/2D/wKgASV4O30WABBFVAABxlwD5lSo472.jpg'},\n",
       " {'Movieid': 691481,\n",
       "  'MovieName': '我和我的祖国',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 4750,\n",
       "  'AvgPrice': 79,\n",
       "  'AvgPeoPle': 302.0,\n",
       "  'Area': '中国/中国香港',\n",
       "  'ReleaseTime': '2019-09-30',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/05/1F/wKgASV1-9TiAGhZmAACCZzeu0MY565.jpg'},\n",
       " {'Movieid': 679955,\n",
       "  'MovieName': '半个喜剧',\n",
       "  'Genre_Main': '喜剧',\n",
       "  'BoxOffice': 4568,\n",
       "  'AvgPrice': 36,\n",
       "  'AvgPeoPle': 7.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': '2019-12-20',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/96/wKgAS13kb0aAAb3JAACGkP3ufwg691.jpg'},\n",
       " {'Movieid': 695823,\n",
       "  'MovieName': '喋血战士',\n",
       "  'Genre_Main': '动作',\n",
       "  'BoxOffice': 4552,\n",
       "  'AvgPrice': 29,\n",
       "  'AvgPeoPle': 6.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-07-24',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/06/46/wKgASV8X-iKAe9ihAABdeasQRLk396.jpg'},\n",
       " {'Movieid': 695739,\n",
       "  'MovieName': '1/2的魔法',\n",
       "  'Genre_Main': '动画',\n",
       "  'BoxOffice': 4430,\n",
       "  'AvgPrice': 32,\n",
       "  'AvgPeoPle': 8.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-08-19',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/03/57/wKgASl8suI6ALbwCAAB5PR0UPE4862.jpg'},\n",
       " {'Movieid': 679292,\n",
       "  'MovieName': '动物特工局',\n",
       "  'Genre_Main': '动画',\n",
       "  'BoxOffice': 4417,\n",
       "  'AvgPrice': 32,\n",
       "  'AvgPeoPle': 4.0,\n",
       "  'Area': '中国/法国',\n",
       "  'ReleaseTime': '2020-01-11',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/97/wKgASl3oa5OAIc2BAACIc3tzqtY217.jpg'},\n",
       " {'Movieid': 676432,\n",
       "  'MovieName': '冰雪奇缘2',\n",
       "  'Genre_Main': '动画',\n",
       "  'BoxOffice': 3805,\n",
       "  'AvgPrice': 34,\n",
       "  'AvgPeoPle': 7.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2019-11-22',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/93/wKgAS12lOvCAP93oAACEwKNAR90206.jpg'},\n",
       " {'Movieid': 695236,\n",
       "  'MovieName': '熊出没·狂野大陆',\n",
       "  'Genre_Main': '动画',\n",
       "  'BoxOffice': 3673,\n",
       "  'AvgPrice': 36,\n",
       "  'AvgPeoPle': 13.0,\n",
       "  'Area': '中国',\n",
       "  'ReleaseTime': None,\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/02/97/wKgASl38bpeAW2i9AAB0zapEpBM220.jpg'},\n",
       " {'Movieid': 681780,\n",
       "  'MovieName': '小妇人',\n",
       "  'Genre_Main': '剧情',\n",
       "  'BoxOffice': 3663,\n",
       "  'AvgPrice': 33,\n",
       "  'AvgPeoPle': 10.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-08-25',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group2/M00/03/57/wKgAS18szYmAfJs6AABBgabJ5qY738.jpg'},\n",
       " {'Movieid': 573724,\n",
       "  'MovieName': '绝地战警：疾速追击',\n",
       "  'Genre_Main': '动作',\n",
       "  'BoxOffice': 3505,\n",
       "  'AvgPrice': 30,\n",
       "  'AvgPeoPle': 5.0,\n",
       "  'Area': '美国',\n",
       "  'ReleaseTime': '2020-08-14',\n",
       "  'defaultImage': 'https://images.entgroup.cn/group1/M00/06/4C/wKgASV8aUp-AUxupAAB7gF-9inE068.jpg'}]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import json\n",
    "\n",
    "url = \"https://www.endata.com.cn/API/GetData.ashx\"\n",
    "\n",
    "data = {\n",
    "    'year': 2020,\n",
    "    'MethodName': 'BoxOffice_GetYearInfoData'\n",
    "}\n",
    "\n",
    "datas = requests.post(url, data=data).text\n",
    "\n",
    "datas = json.loads(datas)\n",
    "table = datas['Data']['Table']\n",
    "table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['八佰',\n",
       " '宠爱',\n",
       " '误杀',\n",
       " '我在时间尽头等你',\n",
       " '叶问4：完结篇',\n",
       " '哈利·波特与魔法石',\n",
       " '多力特的奇幻冒险',\n",
       " '星际穿越',\n",
       " '我为你牺牲',\n",
       " '变身特工',\n",
       " '1917',\n",
       " '美丽人生',\n",
       " '天使陷落',\n",
       " '荞麦疯长',\n",
       " '紫罗兰永恒花园外传：永远与自动手记人偶',\n",
       " '鲨海逃生',\n",
       " '我和我的祖国',\n",
       " '半个喜剧',\n",
       " '喋血战士',\n",
       " '1/2的魔法',\n",
       " '动物特工局',\n",
       " '冰雪奇缘2',\n",
       " '熊出没·狂野大陆',\n",
       " '小妇人',\n",
       " '绝地战警：疾速追击']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取电影名\n",
    "names = [i['MovieName'] for i in table]\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=669412',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=685447',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=693266',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=684830',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=624015',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=412178',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=662461',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=346481',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=696120',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=694783',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=682205',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=456236',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=694876',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=673894',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=695496',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=694805',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=691481',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=679955',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=695823',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=695739',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=679292',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=676432',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=695236',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=681780',\n",
       " 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id=573724']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取电影URL链接\n",
    "hrefs = []\n",
    "for i in table:\n",
    "    id = i['Movieid']\n",
    "    href = 'https://www.endata.com.cn/BoxOffice/MovieStock/movieShow.html?id={}'.format(id)\n",
    "    hrefs.append(href)\n",
    "hrefs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['2020-08-21',\n",
       " '2019-12-31',\n",
       " '2019-12-13',\n",
       " '2020-08-25',\n",
       " '2019-12-20',\n",
       " '2002-01-31',\n",
       " '2020-07-24',\n",
       " '2014-11-12',\n",
       " '2019-12-05',\n",
       " '2020-01-03',\n",
       " '2020-08-07',\n",
       " '2020-01-03',\n",
       " '2019-12-31',\n",
       " '2020-08-25',\n",
       " '2020-01-10',\n",
       " '2020-01-10',\n",
       " '2019-09-30',\n",
       " '2019-12-20',\n",
       " '2020-07-24',\n",
       " '2020-08-19',\n",
       " '2020-01-11',\n",
       " '2019-11-22',\n",
       " None,\n",
       " '2020-08-25',\n",
       " '2020-08-14']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取电影类型\n",
    "genre_main = [i['Genre_Main'] for i in table]\n",
    "# 获取总票房(万)\n",
    "boxoffice = [i['BoxOffice'] for i in table]\n",
    "# 获取平均票价\n",
    "avgprice = [i['AvgPrice'] for i in table]\n",
    "# 获取场均人次\n",
    "avgpepole = [int(i['AvgPeoPle']) for i in table]\n",
    "# 获取国家及地区\n",
    "area = [i['Area'] for i in table]\n",
    "# 获取上映时间\n",
    "releasetime = [i['ReleaseTime'] for i in table]\n",
    "releasetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['管虎 Hu Guan|12276/',\n",
       " '杨子 Larry Yang|2226264/',\n",
       " '柯汶利 Sam Quah|2354557/',\n",
       " '姚婷婷 Ting Ting Yao|2024527/',\n",
       " '叶伟信 Wilson Yip|2230/',\n",
       " '克里斯·哥伦布 Chris Columbus|538101/',\n",
       " '杨乐 Le Yang|1871089/',\n",
       " '克里斯托弗·诺兰 Christopher Nolan|66379/',\n",
       " '安战军 Zhanjun An|2475/',\n",
       " '特洛伊·奎安 Troy Quane|511866/尼克·布鲁诺 Nick Bruno|1148029/',\n",
       " '萨姆·门德斯 Sam Mendes|372050/',\n",
       " '罗伯托·贝尼尼 Roberto Benigni|134559/',\n",
       " '里克·罗曼·沃夫 Ric Roman Waugh|227389/',\n",
       " '徐展雄 Xu Zhanxiong|2181448/',\n",
       " '藤田春香 Fujita Haruka|2399895/',\n",
       " '约翰内斯·罗伯茨 Johannes Roberts|2248427/',\n",
       " '陈凯歌 Kaige Chen|403/张一白 Yibai Zhang|27/管虎 Hu Guan|12276/薛晓路 Xiaolu Xue|4291/徐峥 Zheng Xu|94/宁浩 Hao Ning|3337/文牧野 Muye Wen|2226550/',\n",
       " '刘露 Lu Liu|2270668/周申 |2179646/',\n",
       " '戴夫·威尔逊 Dave Wilson|2368991/',\n",
       " '丹·斯坎隆 Dan·ShiKanLong|2022077/',\n",
       " '张志一 Zhiyi Zhang|2303433/纪约姆·伊弗奈 Guillaume Ivernel|2396367/',\n",
       " '珍妮弗·李 Jennifer Lee|2189071/克里斯·巴克 Chris Buck|464431/',\n",
       " '丁亮 Leon Ding|2349150/邵和麒 Heqi Shao|2228948/',\n",
       " '格蕾塔·葛韦格 Greta Gerwig|240367/',\n",
       " '阿迪尔·埃尔·阿比 Adil El Arbi|2369998/比拉勒·法拉赫 Bilall Fallah|2399166/']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def getInfo(id):\n",
    "    '''获取详情页信息'''\n",
    "    data = {\n",
    "        'movieId': id,\n",
    "        'MethodName': 'BoxOffice_GetMovieData_Details'\n",
    "    }\n",
    "    datas = requests.post(url, data=data).text\n",
    "    datas = json.loads(datas)\n",
    "    director = datas['Data']['Table'][0]['MovieDyan']\n",
    "    return director\n",
    "\n",
    "\n",
    "# 获取导演\n",
    "directors = [getInfo(int(i[-6:])) for i in hrefs]\n",
    "directors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>hrefs</th>\n",
       "      <th>type</th>\n",
       "      <th>boxoffice</th>\n",
       "      <th>area</th>\n",
       "      <th>avgprice</th>\n",
       "      <th>avgpepole</th>\n",
       "      <th>releasetime</th>\n",
       "      <th>director</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>八佰</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>战争</td>\n",
       "      <td>206778</td>\n",
       "      <td>中国</td>\n",
       "      <td>38</td>\n",
       "      <td>31</td>\n",
       "      <td>2020-08-21</td>\n",
       "      <td>管虎 Hu Guan|12276/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>宠爱</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>51030</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-31</td>\n",
       "      <td>杨子 Larry Yang|2226264/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>误杀</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>49929</td>\n",
       "      <td>中国</td>\n",
       "      <td>33</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-13</td>\n",
       "      <td>柯汶利 Sam Quah|2354557/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>我在时间尽头等你</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>爱情</td>\n",
       "      <td>43905</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>22</td>\n",
       "      <td>2020-08-25</td>\n",
       "      <td>姚婷婷 Ting Ting Yao|2024527/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>叶问4：完结篇</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>41837</td>\n",
       "      <td>中国/中国香港</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>2019-12-20</td>\n",
       "      <td>叶伟信 Wilson Yip|2230/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>哈利·波特与魔法石</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>魔幻/动作</td>\n",
       "      <td>18665</td>\n",
       "      <td>英国</td>\n",
       "      <td>35</td>\n",
       "      <td>9</td>\n",
       "      <td>2002-01-31</td>\n",
       "      <td>克里斯·哥伦布 Chris Columbus|538101/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>多力特的奇幻冒险</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>喜剧</td>\n",
       "      <td>13587</td>\n",
       "      <td>美国</td>\n",
       "      <td>32</td>\n",
       "      <td>7</td>\n",
       "      <td>2020-07-24</td>\n",
       "      <td>杨乐 Le Yang|1871089/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>星际穿越</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>科幻</td>\n",
       "      <td>12141</td>\n",
       "      <td>美国/英国</td>\n",
       "      <td>30</td>\n",
       "      <td>8</td>\n",
       "      <td>2014-11-12</td>\n",
       "      <td>克里斯托弗·诺兰 Christopher Nolan|66379/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>我为你牺牲</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>10610</td>\n",
       "      <td>中国</td>\n",
       "      <td>71</td>\n",
       "      <td>54</td>\n",
       "      <td>2019-12-05</td>\n",
       "      <td>安战军 Zhanjun An|2475/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>变身特工</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动画</td>\n",
       "      <td>10340</td>\n",
       "      <td>美国</td>\n",
       "      <td>35</td>\n",
       "      <td>6</td>\n",
       "      <td>2020-01-03</td>\n",
       "      <td>特洛伊·奎安 Troy Quane|511866/尼克·布鲁诺 Nick Bruno|114...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1917</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>战争</td>\n",
       "      <td>6876</td>\n",
       "      <td>美国</td>\n",
       "      <td>32</td>\n",
       "      <td>5</td>\n",
       "      <td>2020-08-07</td>\n",
       "      <td>萨姆·门德斯 Sam Mendes|372050/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>美丽人生</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>5808</td>\n",
       "      <td>意大利</td>\n",
       "      <td>34</td>\n",
       "      <td>6</td>\n",
       "      <td>2020-01-03</td>\n",
       "      <td>罗伯托·贝尼尼 Roberto Benigni|134559/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>天使陷落</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>5506</td>\n",
       "      <td>美国</td>\n",
       "      <td>33</td>\n",
       "      <td>6</td>\n",
       "      <td>2019-12-31</td>\n",
       "      <td>里克·罗曼·沃夫 Ric Roman Waugh|227389/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>荞麦疯长</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>5032</td>\n",
       "      <td>中国</td>\n",
       "      <td>36</td>\n",
       "      <td>11</td>\n",
       "      <td>2020-08-25</td>\n",
       "      <td>徐展雄 Xu Zhanxiong|2181448/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>紫罗兰永恒花园外传：永远与自动手记人偶</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>4853</td>\n",
       "      <td>日本</td>\n",
       "      <td>30</td>\n",
       "      <td>4</td>\n",
       "      <td>2020-01-10</td>\n",
       "      <td>藤田春香 Fujita Haruka|2399895/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>鲨海逃生</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>灾难</td>\n",
       "      <td>4752</td>\n",
       "      <td>美国/英国</td>\n",
       "      <td>30</td>\n",
       "      <td>4</td>\n",
       "      <td>2020-01-10</td>\n",
       "      <td>约翰内斯·罗伯茨 Johannes Roberts|2248427/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>我和我的祖国</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>4750</td>\n",
       "      <td>中国/中国香港</td>\n",
       "      <td>79</td>\n",
       "      <td>302</td>\n",
       "      <td>2019-09-30</td>\n",
       "      <td>陈凯歌 Kaige Chen|403/张一白 Yibai Zhang|27/管虎 Hu Gu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>半个喜剧</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>喜剧</td>\n",
       "      <td>4568</td>\n",
       "      <td>中国</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>2019-12-20</td>\n",
       "      <td>刘露 Lu Liu|2270668/周申 |2179646/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>喋血战士</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>4552</td>\n",
       "      <td>美国</td>\n",
       "      <td>29</td>\n",
       "      <td>6</td>\n",
       "      <td>2020-07-24</td>\n",
       "      <td>戴夫·威尔逊 Dave Wilson|2368991/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1/2的魔法</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动画</td>\n",
       "      <td>4430</td>\n",
       "      <td>美国</td>\n",
       "      <td>32</td>\n",
       "      <td>8</td>\n",
       "      <td>2020-08-19</td>\n",
       "      <td>丹·斯坎隆 Dan·ShiKanLong|2022077/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>动物特工局</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动画</td>\n",
       "      <td>4417</td>\n",
       "      <td>中国/法国</td>\n",
       "      <td>32</td>\n",
       "      <td>4</td>\n",
       "      <td>2020-01-11</td>\n",
       "      <td>张志一 Zhiyi Zhang|2303433/纪约姆·伊弗奈 Guillaume Iver...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>冰雪奇缘2</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动画</td>\n",
       "      <td>3805</td>\n",
       "      <td>美国</td>\n",
       "      <td>34</td>\n",
       "      <td>7</td>\n",
       "      <td>2019-11-22</td>\n",
       "      <td>珍妮弗·李 Jennifer Lee|2189071/克里斯·巴克 Chris Buck|4...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>熊出没·狂野大陆</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动画</td>\n",
       "      <td>3673</td>\n",
       "      <td>中国</td>\n",
       "      <td>36</td>\n",
       "      <td>13</td>\n",
       "      <td>None</td>\n",
       "      <td>丁亮 Leon Ding|2349150/邵和麒 Heqi Shao|2228948/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>小妇人</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>3663</td>\n",
       "      <td>美国</td>\n",
       "      <td>33</td>\n",
       "      <td>10</td>\n",
       "      <td>2020-08-25</td>\n",
       "      <td>格蕾塔·葛韦格 Greta Gerwig|240367/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>绝地战警：疾速追击</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>3505</td>\n",
       "      <td>美国</td>\n",
       "      <td>30</td>\n",
       "      <td>5</td>\n",
       "      <td>2020-08-14</td>\n",
       "      <td>阿迪尔·埃尔·阿比 Adil El Arbi|2369998/比拉勒·法拉赫 Bilall ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   name                                              hrefs  \\\n",
       "0                    八佰  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "1                    宠爱  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "2                    误杀  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "3              我在时间尽头等你  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "4               叶问4：完结篇  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "5             哈利·波特与魔法石  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "6              多力特的奇幻冒险  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "7                  星际穿越  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "8                 我为你牺牲  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "9                  变身特工  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "10                 1917  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "11                 美丽人生  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "12                 天使陷落  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "13                 荞麦疯长  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "14  紫罗兰永恒花园外传：永远与自动手记人偶  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "15                 鲨海逃生  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "16               我和我的祖国  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "17                 半个喜剧  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "18                 喋血战士  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "19               1/2的魔法  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "20                动物特工局  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "21                冰雪奇缘2  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "22             熊出没·狂野大陆  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "23                  小妇人  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "24            绝地战警：疾速追击  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "\n",
       "     type  boxoffice     area  avgprice  avgpepole releasetime  \\\n",
       "0      战争     206778       中国        38         31  2020-08-21   \n",
       "1      剧情      51030       中国        35          8  2019-12-31   \n",
       "2      剧情      49929       中国        33          8  2019-12-13   \n",
       "3      爱情      43905       中国        35         22  2020-08-25   \n",
       "4      动作      41837  中国/中国香港        36          7  2019-12-20   \n",
       "5   魔幻/动作      18665       英国        35          9  2002-01-31   \n",
       "6      喜剧      13587       美国        32          7  2020-07-24   \n",
       "7      科幻      12141    美国/英国        30          8  2014-11-12   \n",
       "8      剧情      10610       中国        71         54  2019-12-05   \n",
       "9      动画      10340       美国        35          6  2020-01-03   \n",
       "10     战争       6876       美国        32          5  2020-08-07   \n",
       "11     剧情       5808      意大利        34          6  2020-01-03   \n",
       "12     动作       5506       美国        33          6  2019-12-31   \n",
       "13     剧情       5032       中国        36         11  2020-08-25   \n",
       "14     剧情       4853       日本        30          4  2020-01-10   \n",
       "15     灾难       4752    美国/英国        30          4  2020-01-10   \n",
       "16     剧情       4750  中国/中国香港        79        302  2019-09-30   \n",
       "17     喜剧       4568       中国        36          7  2019-12-20   \n",
       "18     动作       4552       美国        29          6  2020-07-24   \n",
       "19     动画       4430       美国        32          8  2020-08-19   \n",
       "20     动画       4417    中国/法国        32          4  2020-01-11   \n",
       "21     动画       3805       美国        34          7  2019-11-22   \n",
       "22     动画       3673       中国        36         13        None   \n",
       "23     剧情       3663       美国        33         10  2020-08-25   \n",
       "24     动作       3505       美国        30          5  2020-08-14   \n",
       "\n",
       "                                             director  \n",
       "0                                   管虎 Hu Guan|12276/  \n",
       "1                              杨子 Larry Yang|2226264/  \n",
       "2                               柯汶利 Sam Quah|2354557/  \n",
       "3                          姚婷婷 Ting Ting Yao|2024527/  \n",
       "4                                叶伟信 Wilson Yip|2230/  \n",
       "5                      克里斯·哥伦布 Chris Columbus|538101/  \n",
       "6                                 杨乐 Le Yang|1871089/  \n",
       "7                   克里斯托弗·诺兰 Christopher Nolan|66379/  \n",
       "8                                安战军 Zhanjun An|2475/  \n",
       "9   特洛伊·奎安 Troy Quane|511866/尼克·布鲁诺 Nick Bruno|114...  \n",
       "10                          萨姆·门德斯 Sam Mendes|372050/  \n",
       "11                    罗伯托·贝尼尼 Roberto Benigni|134559/  \n",
       "12                   里克·罗曼·沃夫 Ric Roman Waugh|227389/  \n",
       "13                          徐展雄 Xu Zhanxiong|2181448/  \n",
       "14                        藤田春香 Fujita Haruka|2399895/  \n",
       "15                 约翰内斯·罗伯茨 Johannes Roberts|2248427/  \n",
       "16  陈凯歌 Kaige Chen|403/张一白 Yibai Zhang|27/管虎 Hu Gu...  \n",
       "17                     刘露 Lu Liu|2270668/周申 |2179646/  \n",
       "18                        戴夫·威尔逊 Dave Wilson|2368991/  \n",
       "19                      丹·斯坎隆 Dan·ShiKanLong|2022077/  \n",
       "20  张志一 Zhiyi Zhang|2303433/纪约姆·伊弗奈 Guillaume Iver...  \n",
       "21  珍妮弗·李 Jennifer Lee|2189071/克里斯·巴克 Chris Buck|4...  \n",
       "22        丁亮 Leon Ding|2349150/邵和麒 Heqi Shao|2228948/  \n",
       "23                       格蕾塔·葛韦格 Greta Gerwig|240367/  \n",
       "24  阿迪尔·埃尔·阿比 Adil El Arbi|2369998/比拉勒·法拉赫 Bilall ...  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'name': names,\n",
    "    'hrefs': hrefs,\n",
    "    'type': genre_main,\n",
    "    'boxoffice': boxoffice,\n",
    "    'area': area,\n",
    "    'avgprice': avgprice,\n",
    "    'avgpepole': avgpepole,\n",
    "    'releasetime': releasetime,\n",
    "    'director': directors\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>hrefs</th>\n",
       "      <th>type</th>\n",
       "      <th>boxoffice</th>\n",
       "      <th>area</th>\n",
       "      <th>avgprice</th>\n",
       "      <th>avgpepole</th>\n",
       "      <th>releasetime</th>\n",
       "      <th>director</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>八佰</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>战争</td>\n",
       "      <td>206778</td>\n",
       "      <td>中国</td>\n",
       "      <td>38</td>\n",
       "      <td>31</td>\n",
       "      <td>2020-08-21</td>\n",
       "      <td>管虎 Hu Guan|12276/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>宠爱</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>51030</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-31</td>\n",
       "      <td>杨子 Larry Yang|2226264/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>误杀</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>49929</td>\n",
       "      <td>中国</td>\n",
       "      <td>33</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-13</td>\n",
       "      <td>柯汶利 Sam Quah|2354557/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>我在时间尽头等你</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>爱情</td>\n",
       "      <td>43905</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>22</td>\n",
       "      <td>2020-08-25</td>\n",
       "      <td>姚婷婷 Ting Ting Yao|2024527/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>叶问4：完结篇</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>41837</td>\n",
       "      <td>中国/中国香港</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>2019-12-20</td>\n",
       "      <td>叶伟信 Wilson Yip|2230/</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       name                                              hrefs type  \\\n",
       "0        八佰  https://www.endata.com.cn/BoxOffice/MovieStock...   战争   \n",
       "1        宠爱  https://www.endata.com.cn/BoxOffice/MovieStock...   剧情   \n",
       "2        误杀  https://www.endata.com.cn/BoxOffice/MovieStock...   剧情   \n",
       "3  我在时间尽头等你  https://www.endata.com.cn/BoxOffice/MovieStock...   爱情   \n",
       "4   叶问4：完结篇  https://www.endata.com.cn/BoxOffice/MovieStock...   动作   \n",
       "\n",
       "   boxoffice     area  avgprice  avgpepole releasetime  \\\n",
       "0     206778       中国        38         31  2020-08-21   \n",
       "1      51030       中国        35          8  2019-12-31   \n",
       "2      49929       中国        33          8  2019-12-13   \n",
       "3      43905       中国        35         22  2020-08-25   \n",
       "4      41837  中国/中国香港        36          7  2019-12-20   \n",
       "\n",
       "                     director  \n",
       "0           管虎 Hu Guan|12276/  \n",
       "1      杨子 Larry Yang|2226264/  \n",
       "2       柯汶利 Sam Quah|2354557/  \n",
       "3  姚婷婷 Ting Ting Yao|2024527/  \n",
       "4        叶伟信 Wilson Yip|2230/  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据存储\n",
    "df.to_csv('movies.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">boxoffice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>type</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>剧情</th>\n",
       "      <td>8</td>\n",
       "      <td>16959.375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>动作</th>\n",
       "      <td>4</td>\n",
       "      <td>13850.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>动画</th>\n",
       "      <td>5</td>\n",
       "      <td>5333.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>喜剧</th>\n",
       "      <td>2</td>\n",
       "      <td>9077.500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>战争</th>\n",
       "      <td>2</td>\n",
       "      <td>106827.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>灾难</th>\n",
       "      <td>1</td>\n",
       "      <td>4752.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>爱情</th>\n",
       "      <td>1</td>\n",
       "      <td>43905.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>科幻</th>\n",
       "      <td>1</td>\n",
       "      <td>12141.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>魔幻/动作</th>\n",
       "      <td>1</td>\n",
       "      <td>18665.000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      boxoffice            \n",
       "          count        mean\n",
       "type                       \n",
       "剧情            8   16959.375\n",
       "动作            4   13850.000\n",
       "动画            5    5333.000\n",
       "喜剧            2    9077.500\n",
       "战争            2  106827.000\n",
       "灾难            1    4752.000\n",
       "爱情            1   43905.000\n",
       "科幻            1   12141.000\n",
       "魔幻/动作         1   18665.000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = df.groupby('type').agg({'boxoffice': ['count', 'mean']})\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>name</th>\n",
       "      <th>hrefs</th>\n",
       "      <th>type</th>\n",
       "      <th>boxoffice</th>\n",
       "      <th>area</th>\n",
       "      <th>avgprice</th>\n",
       "      <th>avgpepole</th>\n",
       "      <th>releasetime</th>\n",
       "      <th>director</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>八佰</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>战争</td>\n",
       "      <td>206778</td>\n",
       "      <td>中国</td>\n",
       "      <td>38</td>\n",
       "      <td>31</td>\n",
       "      <td>2020-08-21</td>\n",
       "      <td>管虎 Hu Guan|12276/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>宠爱</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>51030</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-31</td>\n",
       "      <td>杨子 Larry Yang|2226264/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>误杀</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>剧情</td>\n",
       "      <td>49929</td>\n",
       "      <td>中国</td>\n",
       "      <td>33</td>\n",
       "      <td>8</td>\n",
       "      <td>2019-12-13</td>\n",
       "      <td>柯汶利 Sam Quah|2354557/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>我在时间尽头等你</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>爱情</td>\n",
       "      <td>43905</td>\n",
       "      <td>中国</td>\n",
       "      <td>35</td>\n",
       "      <td>22</td>\n",
       "      <td>2020-08-25</td>\n",
       "      <td>姚婷婷 Ting Ting Yao|2024527/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>叶问4：完结篇</td>\n",
       "      <td>https://www.endata.com.cn/BoxOffice/MovieStock...</td>\n",
       "      <td>动作</td>\n",
       "      <td>41837</td>\n",
       "      <td>中国/中国香港</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>2019-12-20</td>\n",
       "      <td>叶伟信 Wilson Yip|2230/</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0      name                                              hrefs  \\\n",
       "0           0        八佰  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "1           1        宠爱  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "2           2        误杀  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "3           3  我在时间尽头等你  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "4           4   叶问4：完结篇  https://www.endata.com.cn/BoxOffice/MovieStock...   \n",
       "\n",
       "  type  boxoffice     area  avgprice  avgpepole releasetime  \\\n",
       "0   战争     206778       中国        38         31  2020-08-21   \n",
       "1   剧情      51030       中国        35          8  2019-12-31   \n",
       "2   剧情      49929       中国        33          8  2019-12-13   \n",
       "3   爱情      43905       中国        35         22  2020-08-25   \n",
       "4   动作      41837  中国/中国香港        36          7  2019-12-20   \n",
       "\n",
       "                     director  \n",
       "0           管虎 Hu Guan|12276/  \n",
       "1      杨子 Larry Yang|2226264/  \n",
       "2       柯汶利 Sam Quah|2354557/  \n",
       "3  姚婷婷 Ting Ting Yao|2024527/  \n",
       "4        叶伟信 Wilson Yip|2230/  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2 = pd.read_csv('movies.csv')\n",
    "df_2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
